Skip to content

[InstCombine] Fold icmp of gep chain with base #144065

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 2, 2025
Merged

Conversation

nikic
Copy link
Contributor

@nikic nikic commented Jun 13, 2025

Fold icmp between a chain of geps and its base pointer. Previously only a single gep was supported.

This will be extended to handle the case of two gep chains with a common base in a followup.

This helps to avoid regressions after #137297.

@nikic nikic requested a review from dtcxzyw June 13, 2025 12:41
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Jun 13, 2025
@llvmbot
Copy link
Member

llvmbot commented Jun 13, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Nikita Popov (nikic)

Changes

Fold icmp between a chain of geps and its base pointer. Previously only a single gep was supported.

This will be extended to handle the case of two gep chains with a common base in a followup.

This helps to avoid regressions after #137297.


Full diff: https://github.com/llvm/llvm-project/pull/144065.diff

4 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+6-3)
  • (modified) llvm/test/Transforms/InstCombine/getelementptr.ll (+1-1)
  • (modified) llvm/test/Transforms/InstCombine/icmp-gep.ll (+64)
  • (modified) llvm/test/Transforms/InstCombine/pr39908.ll (+3-9)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c112fae351817..60cd93f9e2d42 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -708,10 +708,12 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
     return I;
   };
 
-  Value *PtrBase = GEPLHS->getOperand(0);
-  if (PtrBase == RHS && CanFold(GEPLHS->getNoWrapFlags())) {
+  CommonPointerBase Base = CommonPointerBase::compute(GEPLHS, RHS);
+  if (Base.Ptr == RHS && CanFold(Base.LHSNW)) {
     // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
-    Value *Offset = EmitGEPOffset(GEPLHS);
+    Type *IdxTy = DL.getIndexType(GEPLHS->getType());
+    Value *Offset =
+        EmitGEPOffsets(Base.LHSGEPs, Base.LHSNW, IdxTy, /*RewriteGEPs=*/false);
     return NewICmp(GEPLHS->getNoWrapFlags(), Offset,
                    Constant::getNullValue(Offset->getType()));
   }
@@ -750,6 +752,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
 
     // If the base pointers are different, but the indices are the same, just
     // compare the base pointer.
+    Value *PtrBase = GEPLHS->getOperand(0);
     if (PtrBase != GEPRHS->getOperand(0)) {
       bool IndicesTheSame =
           GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 61236df80bfa6..23e682f5217cd 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -694,7 +694,7 @@ define i32 @test28() nounwind  {
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[T12_REC]] to i64
 ; CHECK-NEXT:    [[T12:%.*]] = getelementptr inbounds [[STRUCT_X:%.*]], ptr [[T45]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[T16:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str1, ptr nonnull [[T12]]) #[[ATTR0]]
-; CHECK-NEXT:    [[T84:%.*]] = icmp eq ptr [[T12]], [[ORIENTATIONS]]
+; CHECK-NEXT:    [[T84:%.*]] = icmp eq i32 [[INDVAR]], 0
 ; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
 ; CHECK-NEXT:    br i1 [[T84]], label [[BB17:%.*]], label [[BB10]]
 ; CHECK:       bb17:
diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll
index 7f8f1ae73948d..c2ec3bc83e066 100644
--- a/llvm/test/Transforms/InstCombine/icmp-gep.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll
@@ -757,3 +757,67 @@ define i1 @gep_diff_base_same_indices_nuw_nusw(ptr %x, ptr %y, i64 %z) {
   %cmp = icmp ult ptr %gep1, %gep2
   ret i1 %cmp
 }
+
+define i1 @gep_multiple_eq(ptr %base, i64 %idx, i64 %idx2) {
+; CHECK-LABEL: @gep_multiple_eq(
+; CHECK-NEXT:    [[GEP1_IDX1:%.*]] = add i64 [[IDX:%.*]], [[IDX2:%.*]]
+; CHECK-NEXT:    [[DOTMASK:%.*]] = and i64 [[GEP1_IDX1]], 4611686018427387903
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[DOTMASK]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr i32, ptr %base, i64 %idx
+  %gep2 = getelementptr i32, ptr %gep1, i64 %idx2
+  %cmp = icmp eq ptr %gep2, %base
+  ret i1 %cmp
+}
+
+define i1 @gep_multiple_eq_commuted(ptr %base, i64 %idx, i64 %idx2) {
+; CHECK-LABEL: @gep_multiple_eq_commuted(
+; CHECK-NEXT:    [[GEP1_IDX1:%.*]] = add i64 [[IDX:%.*]], [[IDX2:%.*]]
+; CHECK-NEXT:    [[DOTMASK:%.*]] = and i64 [[GEP1_IDX1]], 4611686018427387903
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[DOTMASK]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr i32, ptr %base, i64 %idx
+  %gep2 = getelementptr i32, ptr %gep1, i64 %idx2
+  %cmp = icmp eq ptr %base, %gep2
+  ret i1 %cmp
+}
+
+define i1 @gep_mugtiple_ugt_nuw(ptr %base, i64 %idx, i64 %idx2) {
+; CHECK-LABEL: @gep_mugtiple_ugt_nuw(
+; CHECK-NEXT:    [[GEP1_IDX1:%.*]] = sub i64 0, [[IDX2:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[IDX:%.*]], [[GEP1_IDX1]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr nuw i32, ptr %base, i64 %idx
+  %gep2 = getelementptr nuw i32, ptr %gep1, i64 %idx2
+  %cmp = icmp ugt ptr %gep2, %base
+  ret i1 %cmp
+}
+
+define i1 @gep_mugtiple_ugt_not_all_nuw(ptr %base, i64 %idx, i64 %idx2) {
+; CHECK-LABEL: @gep_mugtiple_ugt_not_all_nuw(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr nuw i32, ptr [[BASE:%.*]], i64 [[IDX:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[IDX2:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt ptr [[GEP2]], [[BASE]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr nuw i32, ptr %base, i64 %idx
+  %gep2 = getelementptr i32, ptr %gep1, i64 %idx2
+  %cmp = icmp ugt ptr %gep2, %base
+  ret i1 %cmp
+}
+
+define i1 @gep_mugtiple_ugt_inbounds_nusw(ptr %base, i64 %idx, i64 %idx2) {
+; CHECK-LABEL: @gep_mugtiple_ugt_inbounds_nusw(
+; CHECK-NEXT:    [[GEP1_IDX1:%.*]] = add i64 [[IDX:%.*]], [[IDX2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[GEP1_IDX1]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr inbounds i32, ptr %base, i64 %idx
+  %gep2 = getelementptr nusw i32, ptr %gep1, i64 %idx2
+  %cmp = icmp ugt ptr %gep2, %base
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/pr39908.ll b/llvm/test/Transforms/InstCombine/pr39908.ll
index ca143f417fb27..5d13a331c6d2e 100644
--- a/llvm/test/Transforms/InstCombine/pr39908.ll
+++ b/llvm/test/Transforms/InstCombine/pr39908.ll
@@ -7,9 +7,7 @@ target datalayout = "p:32:32"
 
 define i1 @test(ptr %p, i32 %n) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[N:%.*]], i32 0, i32 0
-; CHECK-NEXT:    [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %end = getelementptr inbounds [0 x %S], ptr %p, i32 0, i32 %n, i32 0, i32 0
@@ -22,9 +20,7 @@ define i1 @test(ptr %p, i32 %n) {
 define i1 @test64(ptr %p, i64 %n) {
 ; CHECK-LABEL: @test64(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT:    [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[TMP1]], i32 0, i32 0
-; CHECK-NEXT:    [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %end = getelementptr inbounds [0 x %S], ptr %p, i64 0, i64 %n, i32 0, i64 0
@@ -37,9 +33,7 @@ define i1 @test64(ptr %p, i64 %n) {
 define i1 @test64_overflow(ptr %p, i64 %n) {
 ; CHECK-LABEL: @test64_overflow(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT:    [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[TMP1]], i32 0, i32 0
-; CHECK-NEXT:    [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %end = getelementptr inbounds [0 x %S], ptr %p, i64 0, i64 %n, i32 0, i64 8589934592

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG

Value *Offset = EmitGEPOffset(GEPLHS);
Type *IdxTy = DL.getIndexType(GEPLHS->getType());
Value *Offset =
EmitGEPOffsets(Base.LHSGEPs, Base.LHSNW, IdxTy, /*RewriteGEPs=*/false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
EmitGEPOffsets(Base.LHSGEPs, Base.LHSNW, IdxTy, /*RewriteGEPs=*/false);
EmitGEPOffsets(Base.LHSGEPs, Base.LHSNW, IdxTy, /*RewriteGEPs=*/true);

Rewrite GEPs to avoid duplicating the arithmetic: dtcxzyw/llvm-opt-benchmark#2510

We can land this later together with

// To avoid duplicating the offset arithmetic, rewrite the GEP to use the
// computed offset.
// TODO: We should probably do this even if there is only one GEP.
bool RewriteGEPs = !Base.LHSGEPs.empty() && !Base.RHSGEPs.empty();

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created #146100 for just this change.

nikic added 2 commits July 1, 2025 15:21
Fold icmp between a chain of geps and its based pointer. Previously
only a single gep was supported.

This will be extended to handle the case of two gep chains with a
common base in a followup.
@nikic nikic merged commit 83272a4 into llvm:main Jul 2, 2025
7 checks passed
@nikic nikic deleted the gep-icmp-multiple branch July 2, 2025 07:23
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jul 2, 2025

LLVM Buildbot has detected a new failure on builder mlir-nvidia running on mlir-nvidia while building llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/15445

Here is the relevant piece of the build log for the reference
Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Integration/GPU/CUDA/async.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary="format=fatbin"  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so    --entry-point-result=void -O0  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt '-pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary=format=fatbin
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so --entry-point-result=void -O0
# .---command stderr------------
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventSynchronize(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# `-----------------------------
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# .---command stderr------------
# | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir:68:12: error: CHECK: expected string not found in input
# |  // CHECK: [84, 84]
# |            ^
# | <stdin>:1:1: note: scanning from here
# | Unranked Memref base@ = 0x588b0ea1e8a0 rank = 1 offset = 0 sizes = [2] strides = [1] data = 
# | ^
# | <stdin>:2:1: note: possible intended match here
# | [42, 42]
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: Unranked Memref base@ = 0x588b0ea1e8a0 rank = 1 offset = 0 sizes = [2] strides = [1] data =  
# | check:68'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |             2: [42, 42] 
# | check:68'0     ~~~~~~~~~
# | check:68'1     ?         possible intended match
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants